df <- read.csv("CrowNestlingClimate.csv", h=TRUE)#n=2394 nestlings


Anne went through CrowNestlingClimate.csv and identified runts and nestlings with untrustworthy measurements, which are outline by her in “SoCalledRuntNestlingsByYear.xlsx” Below, I checked for the presence of those nestlings and removed them from the df.

#Remove nestlings identified by Anne (runts/or suspect measurements)
#file name: Runt Nestlings so-called by year
df$ID[df$ID=="FFW-S DRIN02"]#in, remove below
## [1] "FFW-S DRIN02"
df$ID[df$ID=="RFA-S ALOT02"]#not in
## character(0)
df$ID[df$ID=="5 STAD96"]#not in
## character(0)
df$ID[df$ID=="Y-5 ROTA98"]#not in
## character(0)
df$ID[df$ID=="B3 WINK98"]#in, remove below
## [1] "B3 WINK98"
df$ID[df$ID=="B-1   WINK98"]#not in
## character(0)
df$ID[df$ID=="GL JSUP05"]#in, remove below
## [1] "GL JSUP05"
df$ID[df$ID=="CV WKAY05"]#not in 
## character(0)
df$ID[df$ID=="YB-OSW HORC10"]#not in 
## character(0)
df$ID[df$ID=="FE WCRI18"]#in, remove below
## [1] "FE WCRI18"
df$ID[df$ID=="Q4 CARE11"]#in, remove below
## [1] "Q4 CARE11"
df$ID[df$ID=="OD CCPL02"]#not in
## character(0)
df$ID[df$ID=="*00 WCRI14"]#not in
## character(0)
df$ID[df$ID=="JC NDYC17"]#in, remove below
## [1] "JC NDYC17"
df$ID[df$ID=="Dead under nest   MIPA14"]#not in
## character(0)
df$ID[df$ID=="L-2 KRUM93"]#in, remove below
## [1] "L-2 KRUM93"
df$ID[df$ID=="orange SWEG91"]#in, remove below
## [1] "orange SWEG91"
df$ID[df$ID=="SWB-W BURP03"]#in, remove below
## [1] "SWB-W BURP03"
df$ID[df$ID=="3 PINW00"]#in, remove below
## [1] "3 PINW00"
df$ID[df$ID=="4 PINW00"]#in, remove below
## [1] "4 PINW00"
df$ID[df$ID=="DOA RGBY00"]#in, remove below
## [1] "DOA RGBY00"
df$ID[df$ID=="doa SEPG04"]#in, remove below
## [1] "doa SEPG04"
df$ID[df$ID=="2-r HORC04"]#in, remove below
## [1] "2-r HORC04"
df$ID[df$ID=="doa CLAR05"]#in, remove below
## [1] "doa CLAR05"
df$ID[df$ID=="doa HOMC05"]#in, remove below
## [1] "doa HOMC05"
df$ID[df$ID=="doa JUDD06"]#in, remove below
## [1] "doa JUDD06"
df$ID[df$ID=="doa1 KAYS06"]#in, remove below
## [1] "doa1 KAYS06"
df$ID[df$ID=="doa2 KAYS06"]#in, remove below
## [1] "doa2 KAYS06"
df$ID[df$ID=="doa3 KAYS06"]#in, remove below
## [1] "doa3 KAYS06"
df$ID[df$ID=="doa1 BROT07"]#in, remove below
## [1] "doa1 BROT07"
df$ID[df$ID=="doa2 BROT07"]#in, remove below
## [1] "doa2 BROT07"
df$ID[df$ID=="doa1 NMUR07"]#in, remove below
## [1] "doa1 NMUR07"
df$ID[df$ID=="doa2 NMUR07"]#in, remove below
## [1] "doa2 NMUR07"
df$ID[df$ID=="doa3 NMUR07"]#in, remove below
## [1] "doa3 NMUR07"
df$ID[df$ID=="doa4 NMUR07"]#in, remove below
## [1] "doa4 NMUR07"
df$ID[df$ID=="doa5 NMUR07"]#in, remove below
## [1] "doa5 NMUR07"
df$ID[df$ID=="X0 YFER00"]#identified in DuplicateNestlings file, remove below (*2 obs)
## [1] "X0 YFER00" "X0 YFER00"
#Remove 29 individuals listed above 
df <- df %>% filter(!ID %in% c("FFW-S DRIN02","B3 WINK98","GL JSUP05","Q4 CARE11","JC NDYC17","L-2 KRUM93","orange SWEG91","SWB-W BURP03","3 PINW00","4 PINW00","doa SEPG04","2-r HORC04","doa CLAR05","doa HOMC05","doa JUDD06","doa1 KAYS06","doa2 KAYS06","doa3 KAYS06","doa1 BROT07","doa2 BROT07","doa1 NMUR07","doa2 NMUR07","doa3 NMUR07","doa4 NMUR07","doa5 NMUR07","FE WCRI18","DOA RGBY00","X0 YFER00"))#new n=2365


Below, I identified nestlings that happened to be measured twice (for a variety of reasons). I sent that list to Anne, and she specified which observations to keep in “DuplicateNestlings-ABCAnnotate.xlsx”

#Find duplicate IDs 
duplicates.df <- df %>% group_by(ID) %>% filter(n()>1)

#list of duplicates sent to Anne
#write.csv(duplicates.df, "DuplicateNestlings.csv")

#Deleting SPEG04 per Anne's request
df <- df %>% filter(!NestName=="SEPG04")#4 nestlings (new n=2361)

#Keeping second measurement for GYMN08 per Anne's request
df <- subset(df, NestName!="GYMN08" | BandDateJul!="126")#new n=2359

#Update duplicate IDs (new n=38)
duplicates.df <- df %>% group_by(ID) %>% filter(n()>1)


#Code check on duplicates.df: Sort remaining duplicates.df by year and CalcAge and only retain the first (youngest) observation
duplicates.df <- duplicates.df %>%  group_by(Year) %>% arrange(NestName, CalcAge) %>% dplyr::distinct(ID, .keep_all = TRUE)#new n=19

#Sort remaining duplicates.df by year and CalcAge and only retain the first (youngest) observation
df <- df %>%  group_by(Year) %>% arrange(NestName, CalcAge) %>% dplyr::distinct(ID, .keep_all = TRUE)#new n=2340 (19 fewer than above, so code works)

summary(duplicated(df$ID))#no more duplicates
##    Mode   FALSE 
## logical    2340
#Select variables
df <- df %>% 
  select(Year,Name,NestName,ID,AgeField,CalcAge,HatchDateJul,HatchDateJulYear,AllSex,BillNT,BillWidth,BillDepth,TEC,Head,UpperBill,UBillSurface,TotBillSurface,Skull,Tarsus,Weight)

#Rename variables
df <- df %>% 
  rename(FieldAge=AgeField,BNT=BillNT,BW=BillWidth,BD=BillDepth,UB=UpperBill,UBS=UBillSurface,TBS=TotBillSurface)


#Count NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs
##             Year             Name         NestName               ID 
##                0                3                0                0 
##         FieldAge          CalcAge     HatchDateJul HatchDateJulYear 
##              161                3                3                3 
##           AllSex              BNT               BW               BD 
##                0                3                5                4 
##              TEC             Head               UB              UBS 
##                3                4                1                1 
##              TBS            Skull           Tarsus           Weight 
##                0                7                4               10
#Remove NAs
df <- df %>% 
  filter_at(vars(Weight,HatchDateJul,BD,Tarsus,Skull,BW), all_vars(!is.na(.)))#new n=2323

#Recount NAs
countNAs <- sapply(df, function(x) sum(is.na(x)))
countNAs#still 3 NAs in name for nestlings named "NA"
##             Year             Name         NestName               ID 
##                0                3                0                0 
##         FieldAge          CalcAge     HatchDateJul HatchDateJulYear 
##              159                0                0                0 
##           AllSex              BNT               BW               BD 
##                0                0                0                0 
##              TEC             Head               UB              UBS 
##                0                0                0                0 
##              TBS            Skull           Tarsus           Weight 
##                0                0                0                0
#Change "NA" name to "N_A" so R doesn't cause problems
df <- df %>% 
  mutate(Name = coalesce(Name, "N_A"))


#filter out Weights < 160 
df <- df %>% filter(Weight > 160)#new n=2313
range(df$Weight)
## [1] 163 500
#filter between 24 and 30 days CalcAges
df <- df %>% filter(between(CalcAge, 24,30))#new n=2035
range(df$CalcAge)
## [1] 24.0 29.9
#resort df 
df <- df %>% arrange(Year,NestName,ID)
WeightByFieldAge.plot <- ggplot(data = df, aes(x=FieldAge,y=Weight,label=ID,color=HatchDateJul))+
  geom_point()
ggplotly(WeightByFieldAge.plot)
df <- df %>% filter(between(CalcAge, 24,30))
range(df$CalcAge)
## [1] 24.0 29.9
WeightByCalcAge.plot <- ggplot(data = df, aes(x=CalcAge,y=Weight,label=ID))+
  geom_point()
ggplotly(WeightByCalcAge.plot)
climate.df <- read.csv("ClimateMetrics.csv", h=TRUE)
df <- left_join(df,climate.df, by = "HatchDateJulYear")
df <- df %>% relocate(Date, .before = AllSex)
#write.csv(df, "AllNestlingsClimateJoined.csv")
#variables that don't get scaled 
DataNotScaled.df <- df[,1:10]

#Numerical data that do get scaled
DataToScale.df <- df[,11:39]

#Scale those data
Scaled.df <- scale(DataToScale.df)

#Rejoin with variables that don't get scaled
scaled.df <- cbind(DataNotScaled.df,Scaled.df)
BD.scaled.mdl <- lm(data = scaled.df, BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge)
summary(BD.scaled.mdl)
## 
## Call:
## lm(formula = BD ~ GDDSum12_22 * PrecipSum12_22 + Weight + CalcAge, 
##     data = scaled.df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0363 -0.4870 -0.0231  0.4569  3.6824 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                -4.32054    0.42505 -10.165  < 2e-16 ***
## GDDSum12_22                 0.14162    0.01712   8.275 2.30e-16 ***
## PrecipSum12_22             -0.01036    0.01687  -0.614    0.539    
## Weight                      0.57132    0.01809  31.580  < 2e-16 ***
## CalcAge                     0.16540    0.01624  10.187  < 2e-16 ***
## GDDSum12_22:PrecipSum12_22 -0.07457    0.01569  -4.753 2.14e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7515 on 2029 degrees of freedom
## Multiple R-squared:  0.4366, Adjusted R-squared:  0.4352 
## F-statistic: 314.4 on 5 and 2029 DF,  p-value: < 2.2e-16